- does parent SES cause better grades?
- r(gpa, ses) = .33, b = .41
- potential confound of peer relationships
- r(ses, peer) = .29
- r(gpa, peer) = .37
\[ \hat{Y} = b_{0} + b_{1}X_{1} + b_{2}X_{2}+...+b_{p}X_{p} \]
\[R^2 = \frac{SS_{reggression}} {SS_{Y}} = \frac{s_{\hat{Y}}^2}{s_{Y}} \]
-Can be thought of as overlapping Venn diagrams
\[ sr = r_{y(1.2)} = \frac{r_{Y1}-r_{Y2}r_{12} }{\sqrt{1-r_{12}^2}} \] \[ sr^2 = R_{Y.12}^2 - r_{Y2}^2 \]
\[ pr = r_{y1.2} = \frac{r_{Y1}-r_{Y2}r_{12} }{\sqrt{1-r_{Y2}^2}\sqrt{1-r_{12}^2}} \]
\[ sr = r_{y(1.2)} = \frac{r_{Y1}-r_{Y2}r_{Y12} }{\sqrt{1-r_{12}^2}} \]
\[ pr^2 = \frac{R_{Y.12}^2 - r_{Y2}^2}{1-r_{Y2}^2} \]
\[ sr^2 = R_{Y.12}^2 - r_{Y2}^2 \]
\[ \hat{Y} = b_{0} + b_{1}X_{1} + b_{2}X_{2}+...+b_{p}X_{p} \]
- intercept is when all predictors = 0
- regression coefficients are partial regression coefficients
- predicted change in y for a 1 unit change in x, holding all other predictors constant
mr.model <- lm(Stress ~ Support + Anxiety, data = Multipleregression) summary(mr.model)
## ## Call: ## lm(formula = Stress ~ Support + Anxiety, data = Multipleregression) ## ## Residuals: ## Min 1Q Median 3Q Max ## -4.1958 -0.8994 -0.1370 0.9990 3.6995 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -0.31587 0.85596 -0.369 0.712792 ## Support 0.40618 0.05115 7.941 1.49e-12 *** ## Anxiety 0.25609 0.06740 3.799 0.000234 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.519 on 115 degrees of freedom ## Multiple R-squared: 0.3556, Adjusted R-squared: 0.3444 ## F-statistic: 31.73 on 2 and 115 DF, p-value: 1.062e-11
library(psych) describe(Multipleregression$Stress)
## vars n mean sd median trimmed mad min max range skew kurtosis ## X1 1 118 5.18 1.88 5.27 5.17 1.65 0.62 10.32 9.71 0.08 0.22 ## se ## X1 0.17
library(visreg) visreg2d(mr.model,"Support", "Anxiety", plot.type = "persp")
\[b_{1}^* = \frac{r_{Y1}-r_{Y2}r_{12}}{1-r_{12}^2}\]
\[b_{2}^* = \frac{r_{Y2}-r_{Y1}r_{12}}{1-r_{12}^2}\]
\[b_{1}^* = \frac{r_{Y1}-r_{Y2}r_{12}}{1-r_{12}^2}\]
\[ sr = r_{y(1.2)} = \frac{r_{Y1}-r_{Y2}r_{Y12} }{\sqrt{1-r_{12}^2}} \]
\[b_{1} = b_{1}^*\frac{s_{Y}}{s_{X1}} \]
\[b_{1}^* = b_{1}\frac{s_{X1}}{s_{Y}} \]
\[b_{0} = \bar{Y} - b_{1}\bar{X_{1}} - b_{2}\bar{X_{2}} \]
\[ \hat{Y} = b_{0} + b_{1}X_{1} + b_{2}X_{2} \]
\[ R = \sqrt{b_{1}^*r_{Y1} + b_{2}^*r_{Y2}} \] \[ R^2 = {b_{1}^*r_{Y1} + b_{2}^*r_{Y2}} \]
same as before
\[ \frac{SS_{regression}}{SS_{Y}} = R^2 \] \[ {SS_{regression}} = R^2({SS_{Y})} \]
\[ {SS_{residual}} = (1- R^2){SS_{Y}} \]
\[R_{A}^2 = 1 - (1 -R^2)\frac{n-1}{n-p-1} \]
anova(mr.model)
## Analysis of Variance Table ## ## Response: Stress ## Df Sum Sq Mean Sq F value Pr(>F) ## Support 1 113.151 113.151 49.028 1.807e-10 *** ## Anxiety 1 33.314 33.314 14.435 0.0002336 *** ## Residuals 115 265.407 2.308 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mr.model)
## ## Call: ## lm(formula = Stress ~ Support + Anxiety, data = Multipleregression) ## ## Residuals: ## Min 1Q Median 3Q Max ## -4.1958 -0.8994 -0.1370 0.9990 3.6995 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -0.31587 0.85596 -0.369 0.712792 ## Support 0.40618 0.05115 7.941 1.49e-12 *** ## Anxiety 0.25609 0.06740 3.799 0.000234 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.519 on 115 degrees of freedom ## Multiple R-squared: 0.3556, Adjusted R-squared: 0.3444 ## F-statistic: 31.73 on 2 and 115 DF, p-value: 1.062e-11
\[ H_{0}: \beta_{X}= 0 \] \[ H_{1}: \beta_{X} \neq 0 \]
\[ se_{b} = \frac{s_{Y}}{s_{X}}{\sqrt{\frac {1-r_{xy}^2}{n-2}}} \]
\[ se_{b} = \frac{s_{Y}}{s_{X}}{\sqrt{\frac {1-R_{Y\hat{Y}}^2}{n-p-1}}} \sqrt{\frac {1}{1-R_{i.jkl...p}^2}}\]
\[ se_{b} = \frac{s_{Y}}{s_{X}}{\sqrt{\frac {1-R_{Y\hat{Y}}^2}{n-p-1}}} \sqrt{\frac {1}{1-R_{i.jkl...p}^2}}\]
- what cannot be explained in Xi by other predictors
- Large tolerance (little overlap) means standard error will be small.
- what does this mean for including a lot of variables in your model?
m.1 <- lm(Stress ~ Support, data = Multipleregression) m.2 <- lm(Stress ~ Support + Anxiety, data = Multipleregression) anova(m.1, m.2)
## Analysis of Variance Table ## ## Model 1: Stress ~ Support ## Model 2: Stress ~ Support + Anxiety ## Res.Df RSS Df Sum of Sq F Pr(>F) ## 1 116 298.72 ## 2 115 265.41 1 33.314 14.435 0.0002336 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m.1)
## Analysis of Variance Table ## ## Response: Stress ## Df Sum Sq Mean Sq F value Pr(>F) ## Support 1 113.15 113.151 43.939 1.12e-09 *** ## Residuals 116 298.72 2.575 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m.2)
## Analysis of Variance Table ## ## Response: Stress ## Df Sum Sq Mean Sq F value Pr(>F) ## Support 1 113.151 113.151 49.028 1.807e-10 *** ## Anxiety 1 33.314 33.314 14.435 0.0002336 *** ## Residuals 115 265.407 2.308 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ## Call: ## lm(formula = Stress ~ Support + Anxiety, data = Multipleregression) ## ## Residuals: ## Min 1Q Median 3Q Max ## -4.1958 -0.8994 -0.1370 0.9990 3.6995 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -0.31587 0.85596 -0.369 0.712792 ## Support 0.40618 0.05115 7.941 1.49e-12 *** ## Anxiety 0.25609 0.06740 3.799 0.000234 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.519 on 115 degrees of freedom ## Multiple R-squared: 0.3556, Adjusted R-squared: 0.3444 ## F-statistic: 31.73 on 2 and 115 DF, p-value: 1.062e-11
## ## Call: ## lm(formula = Stress ~ Support, data = Multipleregression) ## ## Residuals: ## Min 1Q Median 3Q Max ## -3.8215 -1.2145 -0.1796 1.0806 3.4326 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 2.56046 0.42189 6.069 1.66e-08 *** ## Support 0.30006 0.04527 6.629 1.12e-09 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.605 on 116 degrees of freedom ## Multiple R-squared: 0.2747, Adjusted R-squared: 0.2685 ## F-statistic: 43.94 on 1 and 116 DF, p-value: 1.12e-09
m.0 <- lm(Stress ~ 1, data = Multipleregression) m.1 <- lm(Stress ~ Support, data = Multipleregression) anova(m.0, m.1)
## Analysis of Variance Table ## ## Model 1: Stress ~ 1 ## Model 2: Stress ~ Support ## Res.Df RSS Df Sum of Sq F Pr(>F) ## 1 117 411.87 ## 2 116 298.72 1 113.15 43.939 1.12e-09 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\[R_{Y.1234...p}^2 = r_{Y1}^2 + r_{Y(2.1)}^2 + r_{Y(3.21)}^2 + r_{Y(4.321)}^2 + ... \]
class(one.way$group)
## [1] "factor"
table(one.way$group)
## ## control tx1 tx2 ## 45 150 58
model.1 <- lm(drugs ~ group, data = one.way ) summary(model.1)
## ## Call: ## lm(formula = drugs ~ group, data = one.way) ## ## Residuals: ## Min 1Q Median 3Q Max ## -0.5015 -0.1524 -0.0613 -0.0613 4.0619 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 0.32159 0.08964 3.588 0.000402 *** ## grouptx1 -0.42402 0.10236 -4.142 4.73e-05 *** ## grouptx2 -0.33289 0.11991 -2.776 0.005923 ** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 0.6013 on 247 degrees of freedom ## (3 observations deleted due to missingness) ## Multiple R-squared: 0.06498, Adjusted R-squared: 0.05741 ## F-statistic: 8.583 on 2 and 247 DF, p-value: 0.0002491
For every nominal/categorical variable that has more than 2 levels R (default R) automatically creates L-1 dummy variables
Each of these dummy variables consists of 0 & 1s just like before, except 1 group (the reference group) only is coded as a zero
The interpretation of each coefficent is the difference between the group coded 1 and the reference group
library(dplyr)
(one.way %>%
group_by(group) %>%
filter(!is.na(drugs)) %>%
summarise(mean(drugs)))
## # A tibble: 3 x 2 ## group `mean(drugs)` ## <fct> <dbl> ## 1 control 0.322 ## 2 tx1 -0.102 ## 3 tx2 -0.0113
contrasts(one.way$group)
## tx1 tx2 ## control 0 0 ## tx1 1 0 ## tx2 0 1
# Can see the same with only 2 levels contrasts(Multipleregression$group)
## Tx ## Control 0 ## Tx 1
levels(one.way$group)
## [1] "control" "tx1" "tx2"
one.way$group.2 <- relevel(one.way$group, "tx2") levels(one.way$group.2)
## [1] "tx2" "control" "tx1"
model.2 <- lm(drugs ~ group.2, data = one.way ) tidy(summary(model.2))
## term estimate std.error statistic p.value ## 1 (Intercept) -0.01129146 0.07964706 -0.1417687 0.887378302 ## 2 group.2control 0.33288569 0.11991225 2.7760774 0.005923213 ## 3 group.2tx1 -0.09112942 0.09373803 -0.9721713 0.331916401
contrasts(one.way$group.2)
## control tx1 ## tx2 0 0 ## control 1 0 ## tx1 0 1
## dummy variables via: contr.treatment(4)
## 2 3 4 ## 1 0 0 0 ## 2 1 0 0 ## 3 0 1 0 ## 4 0 0 1
## effect coding via: contr.sum(4)
## [,1] [,2] [,3] ## 1 1 0 0 ## 2 0 1 0 ## 3 0 0 1 ## 4 -1 -1 -1
contr.sum(3)
## [,1] [,2] ## 1 1 0 ## 2 0 1 ## 3 -1 -1
contrasts(one.way$group) <- contr.sum(3) model.3 <- lm(drugs ~ group, data = one.way ) tidy(model.3)
## term estimate std.error statistic p.value ## 1 (Intercept) 0.06929397 0.04323336 1.602789 0.1102591875 ## 2 group1 0.25230027 0.06743556 3.741353 0.0002276286 ## 3 group2 -0.17171484 0.05180262 -3.314791 0.0010548126
library(psych) describe(one.way$drugs)
## vars n mean sd median trimmed mad min max range skew kurtosis ## X1 1 250 -0.01 0.62 -0.16 -0.15 0 -0.18 4.38 4.56 5.11 26.82 ## se ## X1 0.04
table(one.way$group)
## ## control tx1 tx2 ## 45 150 58
anova(model.3)
## Analysis of Variance Table ## ## Response: drugs ## Df Sum Sq Mean Sq F value Pr(>F) ## group 2 6.207 3.10337 8.5826 0.0002491 *** ## Residuals 247 89.312 0.36159 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
contrasts(one.way$group) <- contr.treatment(3, base = 2) model.4 <- lm(drugs ~ group, data = one.way ) tidy(model.4)
## term estimate std.error statistic p.value ## 1 (Intercept) -0.10242087 0.04942837 -2.0721070 3.929334e-02 ## 2 group1 0.42401511 0.10236434 4.1422150 4.726216e-05 ## 3 group3 0.09112942 0.09373803 0.9721713 3.319164e-01
contrasts(one.way$group) <- contr.treatment(3, base = 3) model.5 <- lm(drugs ~ group, data = one.way ) tidy(model.5)
## term estimate std.error statistic p.value ## 1 (Intercept) -0.01129146 0.07964706 -0.1417687 0.887378302 ## 2 group1 0.33288569 0.11991225 2.7760774 0.005923213 ## 3 group2 -0.09112942 0.09373803 -0.9721713 0.331916401
model.6 <- lm(drugs ~ group + alcohol, data = one.way ) tidy(model.6)
## term estimate std.error statistic p.value ## 1 (Intercept) 0.001765073 0.07620610 0.02316183 9.815400e-01 ## 2 group1 0.245209591 0.11604236 2.11310413 3.559930e-02 ## 3 group2 -0.089204297 0.08963461 -0.99519930 3.206172e-01 ## 4 alcohol 0.206088046 0.04194726 4.91302764 1.637456e-06